Classifying Images by Blobs and Corners

Instantiate the cluster


In [1]:
%matplotlib inline
from IPython.parallel import Client
c = Client()

Import the individual processing function everywhere (slow!)


In [2]:
%%px --local
import numpy as np
import pandas as pd
import os
from os import path
from skimage import io
from skimage.io import ImageCollection
from skimage.exposure.exposure import equalize_hist
from skimage.color.colorconv import rgb2gray
from skimage.feature.blob import blob_log
from skimage.feature.corner import corner_peaks, corner_harris
import cv2

root_path = "/kaggle/retina"

# train/test directories
train_path = path.join(root_path, 'train')
sample_train = path.join(train_path, 'sample')

# in CSV representation
labels_file = path.join(root_path, "trainLabels.csv")
labels = pd.read_csv(labels_file, header=0)

def get_image_name(file_name):
    return path.splitext(path.split(file_name)[1])[0]

def process_single_image(file_name):
    image = io.imread(file_name)
    image_gray = equalize_hist(rgb2gray(image))

    blobs = blob_log(image_gray, max_sigma=30, threshold=.1)
    
    corners = corner_peaks(corner_harris(image_gray), min_distance=2)
    image_name = get_image_name(file_name)
    level = labels[labels['image'] == image_name]['level'].iloc[0]

    return np.array([blobs.shape[0], corners.shape[0], level])

Test on a single file


In [13]:
in_path = path.normpath('/Kaggle/Retina/train/raw')

file_name = path.join(in_path, '457_left.jpeg')
process_single_image(file_name)


Out[13]:
array([  0, 857,   0], dtype=int64)

Now on the cluster


In [ ]:
dv = Client()[:]
files = [path.join(sample_train , f) for f in os.listdir(sample_train)]
lenf = len(files)
asr = dv.map(process_single_image, files[:lenf/2])
final = reduce(lambda state, x: np.vstack((state, x)), asr)
final.tofile('/users/boris/Dropbox/Kaggle/retina/Blobs_Corners_0.bin')